## R code for reproducing the analyses in the article :
## "What Drives Package Authors to Participate in the R Project for Statistical Computing? Exploring Motivation, Values, and Work Design"

## ----------------------- preliminaries -----------------------------------
require("lattice")
require("ltm")
require("simex")
require("memisc")
require("xtable")
require("MASS")
require("effects")
load("RMotivation.rda")    
RMotivation <- na.omit(RMotivation)
psychometrics <- c("wtask", "wsocial", "wknowledge", "mextrinsic", "mhybrid", "mintrinsic", "vuniversalism", "vpower", "vselfdirection")  
psychometricsSE <- paste0(psychometrics, ".se")
demographics <- c("phd", "statseduc", "fulltime", "academia", "statswork")


## -------------------------- examining possible non-response bias ----------------------------
freqsamp <- table(RMotivation$npkgs)                                   ## sample frequencies
sfreqsplot <- c(freqsamp[3:10], sum(freqsamp[11:length(freqsamp)]))    ## conditional relative frequencies (sample)
samp2_10 <- round(prop.table(sfreqsplot), 3) * 100                     ## percentages
pop2_10 <- structure(c(50.9, 18.3, 10.3, 7.1, 3, 2, 1.4, 0.8, 6.3), .Names = c(2:9, "10+"))  ## population percentages

## --- plot sample vs. population proportions (Fig. S1)
plot(2:10, samp2_10, type = "b", xlab = "Number of Packages", ylab = "Relative Frequencies [%]", 
     ylim = c(0,55), main = "Sample vs. Population", xaxt = "n")
points(2:10, pop2_10, type = "b", col = "black", lty = 2, lwd = 1, pch = 2)
legend("topright", legend = c("Sample", "Population"), col = 1, lty = 1:2, pch = 1:2)
axis(1, at = 2:10, labels = c(2:9, "10+"))


## --------------------------- descriptive analysis ----------------------------
## histogram: number of packages (Fig. S2)
hist(RMotivation$npkgs, main = "Histogram R Packages", xlab = "Number of R Packages", breaks = -1:max(RMotivation$npkgs, na.rm = TRUE) + 0.5)


## --------------------------- regression models -------------------------------
## --- negative-binomial regression: number of packages
if(file.exists("models-npkgs.rda")) {
  load("models-npkgs.rda")
} else {
formulaGLMnpkgs <- as.formula(paste("npkgs ~", paste(c(psychometrics, demographics), collapse = " +")))
fitnpkgsNB <- glm.nb(formulaGLMnpkgs, RMotivation, x = TRUE, y = TRUE)
fitnpkgs <- glm(formulaGLMnpkgs, RMotivation, family = negative.binomial(fitnpkgsNB$theta), x = TRUE, y = TRUE)

## stepwise selection
fitnpkgsStep <- step(fitnpkgs, formulaGLMnpkgs, trace = 0)
fitnpkgsStepplot <- fitnpkgsStep

## SIMEX versions
ME <- RMotivation[, psychometricsSE]
fitnpkgsSimex <- simex(fitnpkgs, SIMEXvariable = psychometrics, measurement.error = ME, asymptotic = FALSE) 
psychoind <- psychometrics %in% names(coef(fitnpkgsStep))
ME1 <- RMotivation[, psychometricsSE[psychoind]]
fitnpkgsStepSimex <- simex(fitnpkgsStep, SIMEXvariable = psychometrics[psychoind], measurement.error = ME1, asymptotic = FALSE) 
fitnpkgs$xlevels <- NULL
fitnpkgsStep$xlevels <- NULL

## save results
save(fitnpkgs, fitnpkgsSimex, fitnpkgsStep, fitnpkgsStepSimex, fitnpkgsStepplot, file = "models-npkgs.rda")
}

## regression table (Table S1)
tabnpkgs <- mtable(
  "Full (ML)" = fitnpkgs,
  "Full (SIMEX)" = fitnpkgsSimex,
  "Step (ML)" = fitnpkgsStep,
  "Step (SIMEX)" = fitnpkgsStepSimex,
  summary.stats = FALSE) 
toLatex(tabnpkgs)

## effect plots (Fig. S3)
plot(allEffects(fitnpkgsStepplot), ylab = "Number of packages", type = "response", ylim = c(1.6, 4))


## --- logistic regression (participation in lists)
if(file.exists("models-lists.rda")) {
  load("models-lists.rda")
} else {
## formula
formulaGLMlists <- as.formula(paste("lists ~", paste(c(psychometrics, demographics), collapse = " +")))

## full logistic regression
fitlists <- glm(formulaGLMlists, RMotivation, family = binomial(), x = TRUE, y = TRUE)

## stepwise selection
fitlistsStep <- step(fitlists, formulaGLMlists, trace = 0)
fitlistsStepplot <- fitlistsStep

## SIMEX versions
ME <- RMotivation[, psychometricsSE]
fitlistsSimex <- simex(fitlists, SIMEXvariable = psychometrics, measurement.error = ME, asymptotic = FALSE) 
psychoind <- psychometrics %in% names(coef(fitlistsStep))
ME1 <- RMotivation[, psychometricsSE[psychoind]]
fitlistsStepSimex <- simex(fitlistsStep, SIMEXvariable = psychometrics[psychoind], measurement.error = ME1, asymptotic = FALSE) 
fitlists$xlevels <- NULL
fitlistsStep$xlevels <- NULL

## save results
save(fitlists, fitlistsSimex, fitlistsStep, fitlistsStepSimex, fitlistsStepplot, file = "models-lists.rda")
}

## regression table (Table S2)
tablists <- mtable(
  "Full (ML)" = fitlists,
  "Full (SIMEX)" = fitlistsSimex,
  "Step (ML)" = fitlistsStep,
  "Step (SIMEX)" = fitlistsStepSimex,
  summary.stats = FALSE)  
toLatex(tablists)


## effect plots (Fig. S4)
plot(allEffects(fitlistsStepplot), ylab = "Probability of mailing list participation", type = "response", ylim = c(0.35, 0.75))


## --- logistic regression (conference participation)
if(file.exists("models-meet.rda")) {
  load("models-meet.rda")
} else {
## formula
formulaGLMmeet <- as.formula(paste("meet ~", paste(c(psychometrics, demographics), collapse = " +")))

## full logistic regression
fitmeet <- glm(formulaGLMmeet, RMotivation, family = binomial(), x = TRUE, y = TRUE)

## stepwise selection
fitmeetStep <- step(fitmeet, formulaGLMmeet, trace = 0)
fitmeetStepplot <- fitmeetStep

## SIMEX versions
ME <- RMotivation[, psychometricsSE]
fitmeetSimex <- simex(fitmeet, SIMEXvariable = psychometrics, measurement.error = ME, asymptotic = FALSE) 
psychoind <- psychometrics %in% names(coef(fitmeetStep))
ME1 <- RMotivation[, psychometricsSE[psychoind]]
fitmeetStepSimex <- simex(fitmeetStep, SIMEXvariable = psychometrics[psychoind], measurement.error = ME1, asymptotic = FALSE) 
fitmeet$xlevels <- NULL
fitmeetStep$xlevels <- NULL

## save results
save(fitmeet, fitmeetSimex, fitmeetStep, fitmeetStepSimex, fitmeetStepplot, file = "models-meet.rda")
}


## regression table (Table S3)
tabmeet <- mtable(
  "Full (ML)" = fitmeet,
  "Full (SIMEX)" = fitmeetSimex,
  "Step (ML)" = fitmeetStep,
  "Step (SIMEX)" = fitmeetStepSimex,
  summary.stats = FALSE) 
toLatex(tabmeet)


## effect plots (Fig. S5)
plot(allEffects(fitmeetStepplot), ylab = "Probability of conference participation", type = "response", ylim = c(0.13, 0.45))
